############################################################################## 
# Prepared by Vladimir Chlouba 
# University of Richmond
# vchlouba@richmond.edu
# December 2024
##############################################################################
# The Precolonial Origins of African Nationalism  
# Replication R Script
##############################################################################

# loading the requisite R packages
library(ggplot2)
library(foreign)
library(data.table)
library(stargazer)
library(lme4)
library(arm)
library(memisc)
library(vegan)
library(MASS)
library(nnet)
library(spikeSlabGAM)
library(speedglm)
library(biglm)
library(ordinal)
library(erer)
library(mlogit)
library(scales)
library(nnet)
library(haven)
library(lfe)
library(rgdal)
library(sp)
library(raster)
library(maptools)
library(rgeos)
library(lattice)
library(spdep)
library(cshapes)
library(countrycode)
library(margins)
library(ggmap)
library(dplyr)
library(lmtest)
library(sandwich)
library(fixest)
library(ivmodel)
library(dotwhisker)
library(interplot)
library(gridExtra)


rm(list = ls())
#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

######################################################################
# Table 1: Main Results
######################################################################

# loading the requisite dataset (adjust path)
main.data <- read.csv("chlouba_2025_replication_data.csv")

# creating variable lists
iv_list <- c("centralization+indirect_rule","centralization*indirect_rule")

control_list_pre <- "wateraccess+soil_quality+abslat+mnt2000+avgtemp+avgprec+meanrh+malaria_index+prop_tropics+ln_popd_murdock+coast+river+female+age+age2+col_britain+intensive" 
control_list_post <- "+capdist+bdist1+conflict+lpop+loglightsavg+polity+land_area+slave_exports+education+employed+urban+ef+war"

dv_list <- c("natid_ethnicid", "RSNI_dummy")


# model formulas
fmla1 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_pre),
                          paste("| round | 0 | country")))

fmla2 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_pre),
                          paste(control_list_post),
                          paste("| round | 0 | country")))

fmla3 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_pre),
                          paste("| round | 0 | country")))

fmla4 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_pre),
                          paste(control_list_post),
                          paste("| round | 0 | country")))

fmla5 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_pre),
                          paste("| round | 0 | country")))

fmla6 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_pre),
                          paste(control_list_post),
                          paste("| round | 0 | country")))

fmla7 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_pre),
                          paste("| round | 0 | country")))

fmla8 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_pre),
                          paste(control_list_post),
                          paste("| round | 0 | country")))


# estimate coefficients
m1 <- felm(fmla1, data=main.data)
m2 <- felm(fmla2, data=main.data)
m3 <- felm(fmla3, data=main.data)
m4 <- felm(fmla4, data=main.data)
m5 <- felm(fmla5, data=main.data)
m6 <- felm(fmla6, data=main.data)
m7 <- felm(fmla7, data=main.data)
m8 <- felm(fmla8, data=main.data)

# summaries
summary(m1)
summary(m2)
summary(m3)
summary(m4)
summary(m5)
summary(m6)
summary(m7)
summary(m8)

# LaTeX code for table
stargazer(m1, m2, m3, m4, m5, m6, m7, m8, no.space = T, digits = 2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

# visualization of interactions from Models 4 and 8

# Huber-White heteroskedasticity-robust standard error calculation and table 
# generation code for lm and glm models in R. Written by Joshua Gubler ~  http://joshuagubler.com.
robustse.f <- function(model, cluster, df_correction) {
  
  require(sandwich)
  require(lmtest)
  require(multiwayvcov)
  if(missing(cluster)) {
    name <- deparse(substitute(model))
    modelname <- paste(name,"rob",sep=".")
    model$se <- coeftest(model, vcov=vcovHC(model,"HC1"))[,2]
    model$vcovHC <- vcovHC(model,"HC1")
    assign(modelname,model,envir = .GlobalEnv)
    coeftest(model, vcov=vcovHC(model,"HC1"))
  } else {
    name <- deparse(substitute(model))
    modelname <- paste(name,"rob",sep=".")
    vcovCL <- cluster.vcov(model, cluster, df_correction = df_correction)
    model$vcovCL <- vcovCL
    modelname <- paste(name,"clustrob",sep=".")
    model$se <- coeftest(model, vcovCL)[,2]
    assign(modelname,model,envir = .GlobalEnv)
    coeftest(model, vcovCL)
  }
}


# estimating model
Model.1 <- lm(natid_ethnicid ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef + war + as.factor(round), data = main.data)

# generating clustered standard errors
robustse.f(Model.1, cluster = main.data$country, df_correction = F)

# interaction plot
require(ggthemes)
int1 <- interplot(Model.1.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95) +
  geom_point(size = 3, color = "black") +
  ylim(-0.225, 0.225) +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('National Over Ethnic Identity (1-5)')+ 
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int1


# estimating model
Model.2 <- lm(RSNI_dummy ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef + war + as.factor(round), data = main.data)

# generating clustered standard errors
robustse.f(Model.2, cluster = main.data$country, df_correction = F)

# interaction plot
require(ggthemes)
int2 <- interplot(Model.2.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95) +
  geom_point(size = 3, color = "black") +
  ylim(-0.225, 0.225) +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('National Over Ethnic Identity (0/1)')+
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int2


# arranging the resultant plots
grid.arrange(int1, int2, ncol=2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

######################################################################
# Table 2: Divided Groups in West Africa
######################################################################

# subsetting for West Africa
split.groups <- subset(main.data, main.data$west==1)

# creating variable lists
iv_list <- c("centralization+col_britain","centralization*col_britain")

control_list_pre <- "wateraccess+soil_quality+abslat+mnt2000+avgtemp+avgprec+meanrh+malaria_index+prop_tropics+ln_popd_murdock+coast+river+female+age+age2+intensive" 
control_list_post <- "+capdist+bdist1+conflict+lpop+loglightsavg+polity+land_area+slave_exports+education+employed+urban+ef+war"

dv_list <- c("natid_ethnicid", "RSNI_dummy")


# model formulas
fmla1.west <- as.formula(paste(paste(dv_list[1],"~"),
                               paste(iv_list[2], "+"),
                               paste(control_list_pre),
                               paste("| round | 0 | country")))

fmla2.west <- as.formula(paste(paste(dv_list[1],"~"),
                               paste(iv_list[2], "+"),
                               paste(control_list_pre),
                               paste(control_list_post),
                               paste("| round | 0 | country")))

fmla3.west <- as.formula(paste(paste(dv_list[1],"~"),
                               paste(iv_list[2], "+"),
                               paste(control_list_pre),
                               paste(control_list_post),
                               paste("| round + country + NAME | 0 | country")))

fmla4.west <- as.formula(paste(paste(dv_list[2],"~"),
                               paste(iv_list[2], "+"),
                               paste(control_list_pre),
                               paste("| round | 0 | country")))

fmla5.west <- as.formula(paste(paste(dv_list[2],"~"),
                               paste(iv_list[2], "+"),
                               paste(control_list_pre),
                               paste(control_list_post),
                               paste("| round | 0 | country")))

fmla6.west <- as.formula(paste(paste(dv_list[2],"~"),
                               paste(iv_list[2], "+"),
                               paste(control_list_pre),
                               paste(control_list_post),
                               paste("| round + country + NAME | 0 | country")))


# estimate coefficients
model1 <- felm(fmla1.west, data=split.groups)
model2 <- felm(fmla2.west, data=split.groups)
model3 <- felm(fmla3.west, data=split.groups)
model4 <- felm(fmla4.west, data=split.groups)
model5 <- felm(fmla5.west, data=split.groups)
model6 <- felm(fmla6.west, data=split.groups)


# summaries
summary(model1)
summary(model2)
summary(model3)
summary(model4)
summary(model5)
summary(model6)

# LaTeX code for table
stargazer(model1, model2, model3, model4, model5, model6, no.space = T, digits = 2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

######################################################################
# Interplots: Mechanisms 
######################################################################

# cultural persistence (ethnic stayers vs. ethnic movers)
movers <- subset(main.data, main.data$resident==0)
stayers<- subset(main.data, main.data$resident==1)

# estimating model
Model.3 <- lm(natid_ethnicid ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef + war + as.factor(round), data = stayers)

# generating clustered standard errors
robustse.f(Model.3, cluster = stayers$country, df_correction = F)

# interaction plot
require(ggthemes)
int3 <- interplot(Model.3.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95) +
  geom_point(size = 3, color = "black") +
  ylim(-0.325, 0.45) +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('Ethnic Stayers')+ 
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int3


# estimating model
Model.4 <- lm(natid_ethnicid ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef + war + as.factor(round), data = movers)

# generating clustered standard errors
robustse.f(Model.4, cluster = movers$country, df_correction = F)

# interaction plot
require(ggthemes)
int4 <- interplot(Model.4.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95, ercolor = "gray43") +
  geom_point(size = 3, color = "gray43") +
  ylim(-0.325, 0.45) +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('Ethnic Movers')+ 
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int4


# arranging the resultant plots
grid.arrange(int3, int4, ncol=2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

# institutional persistence (contact with traditional chiefs)
no.contact <- subset(main.data, main.data$contact_trad==0)
contact <- subset(main.data, main.data$contact_trad==1)


# estimating model
Model.5 <- lm(natid_ethnicid ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef + war + as.factor(round), data = contact)

# generating clustered standard errors
robustse.f(Model.5, cluster = contact$country, df_correction = F)

# interaction plot
require(ggthemes)
int5 <- interplot(Model.5.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95) +
  geom_point(size = 3, color = "black") +
  ylim(-0.175, 0.180) +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('Contact with chief')+ 
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int5


# estimating model
Model.6 <- lm(natid_ethnicid ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef + war + as.factor(round), data = no.contact)

# generating clustered standard errors
robustse.f(Model.6, cluster = no.contact$country, df_correction = F)

# interaction plot
require(ggthemes)
int6 <- interplot(Model.6.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95, ercolor = "gray43") +
  geom_point(size = 3, color = "gray43") +
  ylim(-0.175, 0.180) +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('No contact with chief')+ 
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int6


# arranging the resultant plots
grid.arrange(int5, int6, ncol=2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

# institutional persistence (constitutional provisions)
no.cons <- subset(main.data, main.data$TPI_count_b<=2)
cons <- subset(main.data, main.data$TPI_count_b>2)


# estimating model
Model.7 <- lm(natid_ethnicid ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef  + war + as.factor(round), data = cons)

# generating clustered standard errors
robustse.f(Model.7, cluster = cons$country, df_correction = F)

# interaction plot
require(ggthemes)
int7 <- interplot(Model.7.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95) +
  geom_point(size = 3, color = "black") +
  ylim(-0.15, 0.24) +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('Constitutional provisions above median')+ 
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int7


# estimating model
Model.8 <- lm(natid_ethnicid ~ centralization * indirect_rule + col_britain + wateraccess + 
                soil_quality + abslat + mnt2000 + avgtemp + avgprec + meanrh + malaria_index + 
                prop_tropics + ln_popd_murdock + coast + river + female + 
                age + age2 + capdist + bdist1 + conflict + lpop + loglightsavg + 
                polity + land_area + slave_exports + intensive + education + 
                employed + urban + ef + war + as.factor(round), data = no.cons)

# generating clustered standard errors
robustse.f(Model.8, cluster = no.cons$country, df_correction = F)

# interaction plot
require(ggthemes)
int8 <- interplot(Model.8.clustrob, 'centralization', 'indirect_rule', hist = F, point = T, 
                  esize = 1, ci = 0.95, ercolor = "gray43") +
  geom_point(size = 3, color = "gray43") +
  scale_x_continuous(breaks = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) +
  ylim(-0.15, 0.24) +
  xlab('Indirect rule')+
  ylab('Centralization')+
  ggtitle('Constitutional provisions below median')+ 
  theme_bw() + 
  geom_hline(yintercept = 0, linetype = 5, colour = "red")

int8


# arranging the resultant plots
grid.arrange(int7, int8, ncol=2)

# arranging all four plots 
grid.arrange(int3, int4, int5, int6, int7, int8, ncol=2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~
